#encoding:utf8
'''
抓取跟进控制类
'''
def urlProcess(url):
    completeUrl='http://novel.hongxiu.com%s'%url
    return completeUrl

from crawler import Crawler
from item import Item
from saver import Saver
import time
class Controller(object):
    def __init__(self):
        self.crawler=Crawler()
        urlRule='a[id="htmlxiazhang"]'
        urlAttr='href'
        self.urlItem=Item(1,urlRule,urlAttr)
        contentRule='div#htmlContent'
        contentAttr=None
        self.contentItem=Item(1,contentRule,contentAttr)
        self.saver=Saver('b.html')
        self.urlProcessor=urlProcess

    def do(self,rule,url,count):
        item=Item(1,rule)
        saver=Saver('a.html')
        for index in range(count):
            html=Crawler().openUrl(url+str(index+1))
            lous=item.getContent(html)
            for i in lous:
                saver.save2File(str(i))

    def followUrl(self,url):
        if url!=None:
            html=self.crawler.openUrl(url)
            urls=self.urlItem.getContent(html)
            contents=self.contentItem.getContent(html)
            for cont in contents:
                self.saver.save2File(str(cont))
            for url in urls:
                completeUrl=self.urlProcessor(url)
                time.sleep(2)
                self.followUrl(completeUrl)

    def isTheLast(self):
        pass
if __name__=='__main__':
    url='http://novel.hongxiu.com/a/787600/7852790.html'
    Controller().followUrl(url)